home *** CD-ROM | disk | FTP | other *** search
/ PC World Komputer 2010 April / PCWorld0410.iso / hity wydania / Ubuntu 9.10 PL / karmelkowy-koliberek-9.10-netbook-remix-PL.iso / casper / filesystem.squashfs / usr / lib / python2.6 / email / feedparser.pyc (.txt) < prev    next >
Python Compiled Bytecode  |  2009-11-11  |  11KB  |  429 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. """FeedParser - An email feed parser.
  5.  
  6. The feed parser implements an interface for incrementally parsing an email
  7. message, line by line.  This has advantages for certain applications, such as
  8. those reading email messages off a socket.
  9.  
  10. FeedParser.feed() is the primary interface for pushing new data into the
  11. parser.  It returns when there's nothing more it can do with the available
  12. data.  When you have no more data to push into the parser, call .close().
  13. This completes the parsing and returns the root message object.
  14.  
  15. The other advantage of this parser is that it will never throw a parsing
  16. exception.  Instead, when it finds something unexpected, it adds a 'defect' to
  17. the current message.  Defects are just instances that live on the message
  18. object's .defects attribute.
  19. """
  20. __all__ = [
  21.     'FeedParser']
  22. import re
  23. from email import errors
  24. from email import message
  25. NLCRE = re.compile('\r\n|\r|\n')
  26. NLCRE_bol = re.compile('(\r\n|\r|\n)')
  27. NLCRE_eol = re.compile('(\r\n|\r|\n)$')
  28. NLCRE_crack = re.compile('(\r\n|\r|\n)')
  29. headerRE = re.compile('^(From |[\\041-\\071\\073-\\176]{1,}:|[\\t ])')
  30. EMPTYSTRING = ''
  31. NL = '\n'
  32. NeedMoreData = object()
  33.  
  34. class BufferedSubFile(object):
  35.     '''A file-ish object that can have new data loaded into it.
  36.  
  37.     You can also push and pop line-matching predicates onto a stack.  When the
  38.     current predicate matches the current line, a false EOF response
  39.     (i.e. empty string) is returned instead.  This lets the parser adhere to a
  40.     simple abstraction -- it parses until EOF closes the current message.
  41.     '''
  42.     
  43.     def __init__(self):
  44.         self._partial = ''
  45.         self._lines = []
  46.         self._eofstack = []
  47.         self._closed = False
  48.  
  49.     
  50.     def push_eof_matcher(self, pred):
  51.         self._eofstack.append(pred)
  52.  
  53.     
  54.     def pop_eof_matcher(self):
  55.         return self._eofstack.pop()
  56.  
  57.     
  58.     def close(self):
  59.         self._lines.append(self._partial)
  60.         self._partial = ''
  61.         self._closed = True
  62.  
  63.     
  64.     def readline(self):
  65.         if not self._lines:
  66.             if self._closed:
  67.                 return ''
  68.             return NeedMoreData
  69.         line = self._lines.pop()
  70.         for ateof in self._eofstack[::-1]:
  71.             if ateof(line):
  72.                 self._lines.append(line)
  73.                 return ''
  74.         
  75.         return line
  76.  
  77.     
  78.     def unreadline(self, line):
  79.         if not line is not NeedMoreData:
  80.             raise AssertionError
  81.         self._lines.append(line)
  82.  
  83.     
  84.     def push(self, data):
  85.         '''Push some new data into this object.'''
  86.         data = self._partial + data
  87.         self._partial = ''
  88.         parts = NLCRE_crack.split(data)
  89.         self._partial = parts.pop()
  90.         lines = []
  91.         for i in range(len(parts) // 2):
  92.             lines.append(parts[i * 2] + parts[i * 2 + 1])
  93.         
  94.         self.pushlines(lines)
  95.  
  96.     
  97.     def pushlines(self, lines):
  98.         self._lines[:0] = lines[::-1]
  99.  
  100.     
  101.     def is_closed(self):
  102.         return self._closed
  103.  
  104.     
  105.     def __iter__(self):
  106.         return self
  107.  
  108.     
  109.     def next(self):
  110.         line = self.readline()
  111.         if line == '':
  112.             raise StopIteration
  113.         line == ''
  114.         return line
  115.  
  116.  
  117.  
  118. class FeedParser:
  119.     '''A feed-style parser of email.'''
  120.     
  121.     def __init__(self, _factory = message.Message):
  122.         '''_factory is called with no arguments to create a new message obj'''
  123.         self._factory = _factory
  124.         self._input = BufferedSubFile()
  125.         self._msgstack = []
  126.         self._parse = self._parsegen().next
  127.         self._cur = None
  128.         self._last = None
  129.         self._headersonly = False
  130.  
  131.     
  132.     def _set_headersonly(self):
  133.         self._headersonly = True
  134.  
  135.     
  136.     def feed(self, data):
  137.         '''Push more data into the parser.'''
  138.         self._input.push(data)
  139.         self._call_parse()
  140.  
  141.     
  142.     def _call_parse(self):
  143.         
  144.         try:
  145.             self._parse()
  146.         except StopIteration:
  147.             pass
  148.  
  149.  
  150.     
  151.     def close(self):
  152.         '''Parse all remaining data and return the root message object.'''
  153.         self._input.close()
  154.         self._call_parse()
  155.         root = self._pop_message()
  156.         if not not (self._msgstack):
  157.             raise AssertionError
  158.         if root.get_content_maintype() == 'multipart' and not root.is_multipart():
  159.             root.defects.append(errors.MultipartInvariantViolationDefect())
  160.         
  161.         return root
  162.  
  163.     
  164.     def _new_message(self):
  165.         msg = self._factory()
  166.         if self._cur and self._cur.get_content_type() == 'multipart/digest':
  167.             msg.set_default_type('message/rfc822')
  168.         
  169.         if self._msgstack:
  170.             self._msgstack[-1].attach(msg)
  171.         
  172.         self._msgstack.append(msg)
  173.         self._cur = msg
  174.         self._last = msg
  175.  
  176.     
  177.     def _pop_message(self):
  178.         retval = self._msgstack.pop()
  179.         if self._msgstack:
  180.             self._cur = self._msgstack[-1]
  181.         else:
  182.             self._cur = None
  183.         return retval
  184.  
  185.     
  186.     def _parsegen(self):
  187.         self._new_message()
  188.         headers = []
  189.         for line in self._input:
  190.             if line is NeedMoreData:
  191.                 yield NeedMoreData
  192.                 continue
  193.             
  194.             if not headerRE.match(line):
  195.                 if not NLCRE.match(line):
  196.                     self._input.unreadline(line)
  197.                 
  198.                 break
  199.             
  200.             headers.append(line)
  201.         
  202.         self._parse_headers(headers)
  203.         if self._headersonly:
  204.             lines = []
  205.             while True:
  206.                 line = self._input.readline()
  207.                 if line is NeedMoreData:
  208.                     yield NeedMoreData
  209.                     continue
  210.                 
  211.                 if line == '':
  212.                     break
  213.                 
  214.                 lines.append(line)
  215.             self._cur.set_payload(EMPTYSTRING.join(lines))
  216.             return None
  217.         if self._cur.get_content_type() == 'message/delivery-status':
  218.             while True:
  219.                 self._input.push_eof_matcher(NLCRE.match)
  220.                 for retval in self._parsegen():
  221.                     break
  222.                 
  223.                 msg = self._pop_message()
  224.                 self._input.pop_eof_matcher()
  225.                 while True:
  226.                     line = self._input.readline()
  227.                     break
  228.                     continue
  229.                     None if line is NeedMoreData else None if retval is NeedMoreData else self._headersonly
  230.                 while True:
  231.                     line = self._input.readline()
  232.                     if line is NeedMoreData:
  233.                         yield NeedMoreData
  234.                         continue
  235.                     
  236.                     break
  237.                 if line == '':
  238.                     break
  239.                 
  240.                 self._input.unreadline(line)
  241.             return None
  242.         if self._cur.get_content_maintype() == 'message':
  243.             for retval in self._parsegen():
  244.                 if retval is NeedMoreData:
  245.                     yield NeedMoreData
  246.                     self._cur.get_content_type() == 'message/delivery-status'
  247.                     continue
  248.                 
  249.                 break
  250.             
  251.             self._pop_message()
  252.             return None
  253.         if self._cur.get_content_maintype() == 'multipart':
  254.             boundary = self._cur.get_boundary()
  255.             if boundary is None:
  256.                 self._cur.defects.append(errors.NoBoundaryInMultipartDefect())
  257.                 lines = []
  258.                 for line in self._input:
  259.                     lines.append(line)
  260.                 
  261.                 self._cur.set_payload(EMPTYSTRING.join(lines))
  262.                 return None
  263.             separator = '--' + boundary
  264.             boundaryre = re.compile('(?P<sep>' + re.escape(separator) + ')(?P<end>--)?(?P<ws>[ \\t]*)(?P<linesep>\\r\\n|\\r|\\n)?$')
  265.             capturing_preamble = True
  266.             preamble = []
  267.             linesep = False
  268.             while True:
  269.                 line = self._input.readline()
  270.                 if line == '':
  271.                     break
  272.                 
  273.                 mo = boundaryre.match(line)
  274.                 if mo:
  275.                     if mo.group('end'):
  276.                         linesep = mo.group('linesep')
  277.                         break
  278.                     
  279.                     if capturing_preamble:
  280.                         if preamble:
  281.                             lastline = preamble[-1]
  282.                             eolmo = NLCRE_eol.search(lastline)
  283.                             if eolmo:
  284.                                 preamble[-1] = lastline[:-len(eolmo.group(0))]
  285.                             
  286.                             self._cur.preamble = EMPTYSTRING.join(preamble)
  287.                         
  288.                         capturing_preamble = False
  289.                         self._input.unreadline(line)
  290.                         continue
  291.                     
  292.                     while True:
  293.                         line = self._input.readline()
  294.                         if line is NeedMoreData:
  295.                             yield NeedMoreData
  296.                             continue
  297.                         
  298.                         mo = boundaryre.match(line)
  299.                         if not mo:
  300.                             self._input.unreadline(line)
  301.                             break
  302.                             continue
  303.                     self._input.push_eof_matcher(boundaryre.match)
  304.                     for retval in self._parsegen():
  305.                         if retval is NeedMoreData:
  306.                             yield NeedMoreData
  307.                             continue
  308.                         
  309.                         break
  310.                     
  311.                     if self._last.get_content_maintype() == 'multipart':
  312.                         epilogue = self._last.epilogue
  313.                         if epilogue == '':
  314.                             self._last.epilogue = None
  315.                         elif epilogue is not None:
  316.                             mo = NLCRE_eol.search(epilogue)
  317.                             if mo:
  318.                                 end = len(mo.group(0))
  319.                                 self._last.epilogue = epilogue[:-end]
  320.                             
  321.                         
  322.                     else:
  323.                         payload = self._last.get_payload()
  324.                         if isinstance(payload, basestring):
  325.                             mo = NLCRE_eol.search(payload)
  326.                             if mo:
  327.                                 payload = payload[:-len(mo.group(0))]
  328.                                 self._last.set_payload(payload)
  329.                             
  330.                         
  331.                     self._input.pop_eof_matcher()
  332.                     self._pop_message()
  333.                     self._last = self._cur
  334.                     continue
  335.                 if not capturing_preamble:
  336.                     raise AssertionError
  337.                 preamble.append(line)
  338.                 continue
  339.                 capturing_preamble
  340.             if capturing_preamble:
  341.                 self._cur.defects.append(errors.StartBoundaryNotFoundDefect())
  342.                 self._cur.set_payload(EMPTYSTRING.join(preamble))
  343.                 epilogue = []
  344.                 for line in self._input:
  345.                     if line is NeedMoreData:
  346.                         yield NeedMoreData
  347.                         continue
  348.                         continue
  349.                 
  350.                 self._cur.epilogue = EMPTYSTRING.join(epilogue)
  351.                 return None
  352.             if linesep:
  353.                 epilogue = [
  354.                     '']
  355.             else:
  356.                 epilogue = []
  357.             for line in self._input:
  358.                 if line is NeedMoreData:
  359.                     yield NeedMoreData
  360.                     continue
  361.                 
  362.                 epilogue.append(line)
  363.             
  364.             if epilogue:
  365.                 firstline = epilogue[0]
  366.                 bolmo = NLCRE_bol.match(firstline)
  367.                 if bolmo:
  368.                     epilogue[0] = firstline[len(bolmo.group(0)):]
  369.                 
  370.             
  371.             self._cur.epilogue = EMPTYSTRING.join(epilogue)
  372.             return None
  373.         lines = []
  374.         for line in self._input:
  375.             lines.append(line)
  376.         
  377.         self._cur.set_payload(EMPTYSTRING.join(lines))
  378.  
  379.     
  380.     def _parse_headers(self, lines):
  381.         lastheader = ''
  382.         lastvalue = []
  383.         for lineno, line in enumerate(lines):
  384.             if line[0] in ' \t':
  385.                 if not lastheader:
  386.                     defect = errors.FirstHeaderLineIsContinuationDefect(line)
  387.                     self._cur.defects.append(defect)
  388.                     continue
  389.                 
  390.                 lastvalue.append(line)
  391.                 continue
  392.             
  393.             if lastheader:
  394.                 lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
  395.                 self._cur[lastheader] = lhdr
  396.                 lastheader = ''
  397.                 lastvalue = []
  398.             
  399.             if line.startswith('From '):
  400.                 if lineno == 0:
  401.                     mo = NLCRE_eol.search(line)
  402.                     if mo:
  403.                         line = line[:-len(mo.group(0))]
  404.                     
  405.                     self._cur.set_unixfrom(line)
  406.                     continue
  407.                 elif lineno == len(lines) - 1:
  408.                     self._input.unreadline(line)
  409.                     return None
  410.                 defect = errors.MisplacedEnvelopeHeaderDefect(line)
  411.                 self._cur.defects.append(defect)
  412.                 continue
  413.             
  414.             i = line.find(':')
  415.             if i < 0:
  416.                 defect = errors.MalformedHeaderDefect(line)
  417.                 self._cur.defects.append(defect)
  418.                 continue
  419.             
  420.             lastheader = line[:i]
  421.             lastvalue = [
  422.                 line[i + 1:].lstrip()]
  423.         
  424.         if lastheader:
  425.             self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
  426.         
  427.  
  428.  
  429.